/*

Purpose: This program revises the voting variables by using straight rounding rather than binning.
		m_mto_vote_match_v2 - posterior from match. binned voting info ranging 0 to 1. multiple matches per participant
				   (up to 5 highest posterior values for each participant)
		              three voting variables that range 0-1 and have been rounded 
			      
					posterior
					posterior_rank (1 to 5)
					r_pretreatturnout - pre RA voter turnout rounded by .1
					bin10_postturnout - post RA voter turnout rounded by .1
					bin10_postregistration - post RA voter registration rounded by .1


	To produce final voting file, the following rules are applied:

	Bin voting information:
		 Create postregistration = posttreatturnout / postregturnout
		 Round pretreatturnout, posttreatturnout, and postregistration by .1.
		 Cap uncommon values > 1 at 1.
	Limit matches:
		Sort by descending posterior match quality.
		Keep 5 highest unique match posteriors for each participant. (e.g., for one individual might keep
			posteriors of .99, .99, .80, .80, .80, .75, .73, .72, .72) 
 
	Revised 2/7/2021 - Do not set values > 1 to 1 unless original value was non-missing
	        3/31/2021 - Switch from bins to rounding.
Steps
1. set globals
2. save out crosswalk
3. create weight file
4. create matched data with bins and limited # of matches
5. create nonmatch file

*/

********************************************
* 1. set globals
********************************************

cap log close
clear
set type double
global today 20210331

cap log close
log using "~/mtoproj/m10_data/external_researchers/mendelberg/logs/03_create_voting_files_bv2_$today.log", text replace


/* var lists */
global keepfromwgt1 "i_svy_sample f_svy_sample2007 ra_site ra_group f_svy_core_imp f_svy_cmove f_svy_gender f_svy_release_ad f_wt_totsvy f_wt_totcore "
global keepfromwgt2 "f_wt_totcore98 f_wt_totsvy_ad f_svy_iwcompl_ad f_svy_iwcompl_yt "


/* input files */
global xwalk "~/mtoproj/m10_data/external_researchers/mendelberg/mto_voter_xwalk.xlsx"
global votematch "~/mtoproj/m10_data/external_researchers/mendelberg/voting_results_deidentified/mto_voting_match_final_20200907.dta"
global novotematch  "~/mtoproj/m10_data/external_researchers/mendelberg/voting_results_deidentified/mto_voting_unmatch_final_20200907dta.dta"
global wgtplus "~/mtoproj/m10_data/svy_analysis/wgtcov/mto_fnl_wgts_20120413plus.dta"

/* output dirs */
// actual MTO study IDs
global dir_mto_id "~/mtoproj/m10_data/external_researchers/mendelberg/mto_ids/"
// pseudo IDs for voting project
global dir_pseudo_id "~/mtoproj/m10_data/external_researchers/mendelberg/pseudo_ids/"

/* output files */
global outxwlk  "${dir_mto_id}/mto_voter_xwalk2.dta"
global tempbins "~/mtoproj/m10_data/external_researchers/mendelberg/templink/tmp_vote_weights_bins_$today.dta"
// a file with weights
*global outwgtids  "${dir_mto_id}/a_mto_fweights_vote_ids.dta"
*global outwgtpseudo  "${dir_pseudo_id}/a_mto_fweights_vote_pseudo.dta"

// b file with vote match
global outvotefull  "${dir_mto_id}/mto_vote_fullmatch_ids_v2.dta"
global outvoteids  "${dir_mto_id}/b_mto_vote_match5_ids_v2.dta"
global outvotepseudo  "${dir_pseudo_id}/b_mto_vote_match5_pseudo_v2.dta"
// c file with vote unmatch records
*global outunmatchids  "${dir_mto_id}/c_mto_vote_unmatch_ids.dta"
*global outunmatchpseudo  "${dir_pseudo_id}/c_mto_vote_unmatch_pseudo.dta"


********************************************
* 4. create voting match data with bins. limit 5 posteriors.
********************************************

use $votematch, clear
count

gen temp_vote_obsnum = _n

label var posterior "posterior from voting match"

count if !mi(pretreatturnout) & pretreatturnout != "NA"
count if !mi(posttreatturnout) & posttreatturnout != "NA"
count if !mi(postregturnout) & postregturnout != "NA"


// convert voter info to numerics *
gen double tmp_pretreatturnout = real(pretreatturnout)
gen double tmp_postturnout = real(posttreatturnout)
gen double tmp_postregturnout = real(postregturnout)

// create post registration %
gen double tmp_postregistration = tmp_postturnout/tmp_postregturnout 


// rank posteriors
* create reverse of the posterior
gen rev_posterior = 1 - posterior

* count number of ranked posteriors *
cap drop grouped_ranks _mrggrprank
preserve

	keep mtoid posterior rev_posterior
	duplicates drop
	bys mtoid (rev_posterior): gen posterior_group_rank = _n
	tempfile grouprank
	save `grouprank'

restore

* merge on group rankings *
merge m:1 mtoid posterior using `grouprank', gen(_mrggrprank)
label var posterior_group_rank
tab _mrggrprank
tab posterior_group_rank
table posterior_group_rank, c(min posterior mean posterior max posterior)

gen top5_posterior_values = posterior_group_rank <= 5
sum posterior_group_rank if top5_posterior_values == 1

unique posterior if top5_posterior_values == 1 & posterior != .

// generate bins
* create bins for voter registration and percent voted post turnout *
cap drop bin10_*
foreach X in pretreatturnout postturnout postregistration {
	egen bin10_`X' = cut(tmp_`X'), at(0(.10)1) 
	replace bin10_`X' = bin10_`X' + .05
	* combine tiny # of records into neighboring bin *
	if "`X'" == "pretreatturnout" {
	  replace bin10_`X' = .85 if bin10_`X' >=.80 &  bin10_`X' < .96
	}
	replace bin10_`X' = 1 if tmp_`X' >= 1 & !mi(tmp_`X')
	replace bin10_`X' = 0 if tmp_`X' == 0
	tab tmp_`X' if !mi(tmp_`X') & mi(bin10_`X')
	table bin10_`X', contents(min tmp_`X' max tmp_`X' count tmp_`X')
	pwcorr tmp_`X' bin10_`X'
	gen bin10_junk = floor(bin10_`X'*100)
	regress tmp_`X' i.bin10_junk
	drop bin10_junk
	*twoway scatter tmp_`X' bin10_`X', name(`X', replace)
	tab bin10_`X' if mi(tmp_`X')
	tab tmp_`X' if mi(bin10_`X')
	sum tmp_`X' bin10_`X'
}


// generate rounded
gen double r_pretreatturnout = round(tmp_pretreatturnout, 0.1)
replace r_pretreatturnout = 1 if r_pretreatturnout > 1 & !mi(r_pretreatturnout)

gen double r_postturnout = round(tmp_postturnout, 0.1)
replace r_postturnout = 1 if r_postturnout > 1 & !mi(r_postturnout)

gen double r_postregistration = round(tmp_postregistration, 0.1)
replace r_postregistration = 1 if r_postregistration > 1 & !mi(r_postregistration)

gen double r_postregturnout  = round(tmp_postregturnout , 0.1)
replace r_postregturnout = 1 if r_postregturnout > 1 & !mi(r_postregturnout)


label var r_pretreatturnout "r_pretreatturnout - pre RA voter turnout rounded by .1"
label var r_postturnout "r_postturnout - post RA voter turnout rounded by .1"
label var r_postregistration "r_postregistration - tmp_postturnout/tmp_postregturnout rounded by .1"
label var r_postregturnout "post reg turnout rounded by .1"

foreach X in pretreatturnout postturnout postregturnout {
	twoway scatter tmp_`X' r_`X', name(gr_`X', replace)
	pwcorr tmp_`X' r_`X'
}


// check variables
sum *pretreat*
sum *postregturnout*
sum *postturn*

// check combinations
egen groupround = group(r_pretreatturnout r_postturnout r_postregturnout)
tab groupround, sort
drop groupround

// keep vars
keep mtoid posterior r_pretreatturnout r_postturnout r_postregturnout posterior_group_rank top5_posterior_values

// link to crosswalk to get famid
rename mtoid mto_pseudo_id
merge m:1 mto_pseudo_id using $outxwlk, keepusing(ppid famid mto_pseudo_id mto_pseudo_famid) gen(_mrgxwlk)
keep if inlist(_mrgxwlk, 1, 3)
assert _mrgxwlk == 3
drop _mrgxwlk

// save out all matches
save ${outvotefull}, replace

// limit to top 5
keep if top5_posterior_values == 1
tab posterior_group_rank

// save with ids
save ${outvoteids}, replace

// save without ids
drop ppid famid
cap des *ppid*
cap des *famid*
cap des *dob*
des , f
unique mto_pseudo_id

save ${outvotepseudo}, replace



log close




